{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pylab as plt\n",
    "\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def load_text(path):\n",
    "    textdict = {}\n",
    "    with open(path) as f:\n",
    "        for line in f.readlines():\n",
    "            user = line.split(',')[0]\n",
    "            if user not in textdict:\n",
    "                textdict[user] = []\n",
    "            textdict[user] = line.split(',')[1].strip().split(' ')\n",
    "    \n",
    "    return textdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "testdict = load_text('../raw/test.txt')\n",
    "news = pd.read_pickle('../pkl/process.news.pkl')\n",
    "all_news = pd.read_pickle('../pkl/process.all_news.pkl')\n",
    "news = pd.concat([all_news, news], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "nonlist = {}\n",
    "test_list = []\n",
    "for i in testdict:\n",
    "    for j in testdict[i]:\n",
    "        if news[news.item_id == int(j)].values.shape[0] == 0:\n",
    "            if j not in nonlist:\n",
    "                nonlist[j] = 0\n",
    "            nonlist[j] += 1\n",
    "        else:\n",
    "            test_list.append([ i, j, news[news.item_id == int(j)].values[0,1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_list = np.array(test_list)\n",
    "test_df = pd.DataFrame(data=test_list, columns=['user_id','item_id','cate_id'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "item_id                   557579\n",
       "cate_id                      1_6\n",
       "timestamp             1487424900\n",
       "date         2017-02-18 21:35:00\n",
       "Name: 557579, dtype: object"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# group = test_df.groupby(['cate_id', 'item_id']).count()\n",
    "# print(nonlist.__len__())\n",
    "whole_news = whole_news.drop_duplicates()\n",
    "whole_news.loc[557579]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>cate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>557579</th>\n",
       "      <td>212</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558082</th>\n",
       "      <td>168</td>\n",
       "      <td>1_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558788</th>\n",
       "      <td>156</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557167</th>\n",
       "      <td>134</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558910</th>\n",
       "      <td>131</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556664</th>\n",
       "      <td>122</td>\n",
       "      <td>1_8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552472</th>\n",
       "      <td>111</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555820</th>\n",
       "      <td>106</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558077</th>\n",
       "      <td>94</td>\n",
       "      <td>1_5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557018</th>\n",
       "      <td>92</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556700</th>\n",
       "      <td>91</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558096</th>\n",
       "      <td>91</td>\n",
       "      <td>1_12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558498</th>\n",
       "      <td>89</td>\n",
       "      <td>1_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558259</th>\n",
       "      <td>88</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558768</th>\n",
       "      <td>85</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558625</th>\n",
       "      <td>78</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558871</th>\n",
       "      <td>76</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558578</th>\n",
       "      <td>73</td>\n",
       "      <td>1_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558331</th>\n",
       "      <td>73</td>\n",
       "      <td>1_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552335</th>\n",
       "      <td>73</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>472094</th>\n",
       "      <td>69</td>\n",
       "      <td>1_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556595</th>\n",
       "      <td>67</td>\n",
       "      <td>1_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556489</th>\n",
       "      <td>62</td>\n",
       "      <td>1_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558592</th>\n",
       "      <td>62</td>\n",
       "      <td>1_27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557193</th>\n",
       "      <td>61</td>\n",
       "      <td>1_6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558274</th>\n",
       "      <td>59</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>554513</th>\n",
       "      <td>58</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557835</th>\n",
       "      <td>56</td>\n",
       "      <td>1_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558773</th>\n",
       "      <td>54</td>\n",
       "      <td>1_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558588</th>\n",
       "      <td>53</td>\n",
       "      <td>1_3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556519</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556439</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559337</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559443</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559447</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559449</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>563023</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562794</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562785</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562769</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562688</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562267</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562247</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562069</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561975</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561789</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561777</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561753</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561624</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561146</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>560079</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559797</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559762</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559736</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559713</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559701</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559575</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559559</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559543</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>560896</th>\n",
       "      <td>1</td>\n",
       "      <td>1_27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9843 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        sum  cate\n",
       "557579  212   1_6\n",
       "558082  168  1_11\n",
       "558788  156   1_6\n",
       "557167  134   1_6\n",
       "558910  131   1_1\n",
       "556664  122   1_8\n",
       "552472  111   1_2\n",
       "555820  106   1_6\n",
       "558077   94   1_5\n",
       "557018   92   1_6\n",
       "556700   91   1_1\n",
       "558096   91  1_12\n",
       "558498   89  1_11\n",
       "558259   88   1_6\n",
       "558768   85   1_1\n",
       "558625   78   1_6\n",
       "558871   76   1_1\n",
       "558578   73   1_3\n",
       "558331   73   1_3\n",
       "552335   73   1_6\n",
       "472094   69  1_11\n",
       "556595   67   1_3\n",
       "556489   62   1_3\n",
       "558592   62  1_27\n",
       "557193   61   1_6\n",
       "558274   59   1_1\n",
       "554513   58   1_1\n",
       "557835   56  1_11\n",
       "558773   54   1_1\n",
       "558588   53   1_3\n",
       "...     ...   ...\n",
       "556519    1   1_2\n",
       "556439    1   1_2\n",
       "559337    1   1_2\n",
       "559443    1   1_2\n",
       "559447    1   1_2\n",
       "559449    1   1_2\n",
       "563023    1   1_2\n",
       "562794    1   1_2\n",
       "562785    1   1_2\n",
       "562769    1   1_2\n",
       "562688    1   1_2\n",
       "562267    1   1_2\n",
       "562247    1   1_2\n",
       "562069    1   1_2\n",
       "561975    1   1_2\n",
       "561789    1   1_2\n",
       "561777    1   1_2\n",
       "561753    1   1_2\n",
       "561624    1   1_2\n",
       "561146    1   1_2\n",
       "560079    1   1_2\n",
       "559797    1   1_2\n",
       "559762    1   1_2\n",
       "559736    1   1_2\n",
       "559713    1   1_2\n",
       "559701    1   1_2\n",
       "559575    1   1_2\n",
       "559559    1   1_2\n",
       "559543    1   1_2\n",
       "560896    1  1_27\n",
       "\n",
       "[9843 rows x 2 columns]"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group = pd.read_pickle('../pkl/sort_test.pkl')\n",
    "g = group.loc['1_1'].sort_values('user_id', ascending=False)\n",
    "gt = group.sort_values('user_id', ascending=False)\n",
    "gt['item_id'] = [x[0] for x in gt.index]\n",
    "gt.index = [x[1] for x in gt.index]\n",
    "gt['sum'] = gt.user_id\n",
    "del gt['user_id']\n",
    "gt['cate'] = gt.item_id\n",
    "del gt['item_id']\n",
    "gt\n",
    "# pd.merge([gt, news], )\n",
    "# all_news.index = all_news['item_id']\n",
    "# news.index = news['item_id']\n",
    "# del all_news['item_id']\n",
    "# del news['item_id']\n",
    "# gt.join(news, how='outer')\n",
    "# whole_news  = pd.concat([all_news, news], axis=0)\n",
    "# whole_news.iloc['557579']\n",
    "# gt.join(whole_news).sort_values('sum', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>cate</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>557579</th>\n",
       "      <td>212</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487424900</td>\n",
       "      <td>2017-02-18 21:35:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558082</th>\n",
       "      <td>168</td>\n",
       "      <td>1_11</td>\n",
       "      <td>1487342792</td>\n",
       "      <td>2017-02-17 22:46:32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558788</th>\n",
       "      <td>156</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487364890</td>\n",
       "      <td>2017-02-18 04:54:50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557167</th>\n",
       "      <td>134</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487337226</td>\n",
       "      <td>2017-02-17 21:13:46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558910</th>\n",
       "      <td>131</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487370659</td>\n",
       "      <td>2017-02-18 06:30:59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556664</th>\n",
       "      <td>122</td>\n",
       "      <td>1_8</td>\n",
       "      <td>1487365200</td>\n",
       "      <td>2017-02-18 05:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552472</th>\n",
       "      <td>111</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487347260</td>\n",
       "      <td>2017-02-18 00:01:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>555820</th>\n",
       "      <td>106</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487404440</td>\n",
       "      <td>2017-02-18 15:54:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558077</th>\n",
       "      <td>94</td>\n",
       "      <td>1_5</td>\n",
       "      <td>1487342774</td>\n",
       "      <td>2017-02-17 22:46:14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557018</th>\n",
       "      <td>92</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487336345</td>\n",
       "      <td>2017-02-17 20:59:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556700</th>\n",
       "      <td>91</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487334176</td>\n",
       "      <td>2017-02-17 20:22:56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558096</th>\n",
       "      <td>91</td>\n",
       "      <td>1_12</td>\n",
       "      <td>1487342870</td>\n",
       "      <td>2017-02-17 22:47:50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558498</th>\n",
       "      <td>89</td>\n",
       "      <td>1_11</td>\n",
       "      <td>1487355430</td>\n",
       "      <td>2017-02-18 02:17:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558259</th>\n",
       "      <td>88</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487349238</td>\n",
       "      <td>2017-02-18 00:33:58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558768</th>\n",
       "      <td>85</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487364894</td>\n",
       "      <td>2017-02-18 04:54:54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558625</th>\n",
       "      <td>78</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487358596</td>\n",
       "      <td>2017-02-18 03:09:56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558871</th>\n",
       "      <td>76</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487369718</td>\n",
       "      <td>2017-02-18 06:15:18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558578</th>\n",
       "      <td>73</td>\n",
       "      <td>1_3</td>\n",
       "      <td>1487357157</td>\n",
       "      <td>2017-02-18 02:45:57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558331</th>\n",
       "      <td>73</td>\n",
       "      <td>1_3</td>\n",
       "      <td>1487351195</td>\n",
       "      <td>2017-02-18 01:06:35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552335</th>\n",
       "      <td>73</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487376000</td>\n",
       "      <td>2017-02-18 08:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>472094</th>\n",
       "      <td>69</td>\n",
       "      <td>1_11</td>\n",
       "      <td>1486440129</td>\n",
       "      <td>2017-02-07 12:02:09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556595</th>\n",
       "      <td>67</td>\n",
       "      <td>1_3</td>\n",
       "      <td>1487363400</td>\n",
       "      <td>2017-02-18 04:30:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556489</th>\n",
       "      <td>62</td>\n",
       "      <td>1_3</td>\n",
       "      <td>1487347920</td>\n",
       "      <td>2017-02-18 00:12:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558592</th>\n",
       "      <td>62</td>\n",
       "      <td>1_27</td>\n",
       "      <td>1487357702</td>\n",
       "      <td>2017-02-18 02:55:02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557193</th>\n",
       "      <td>61</td>\n",
       "      <td>1_6</td>\n",
       "      <td>1487337417</td>\n",
       "      <td>2017-02-17 21:16:57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558274</th>\n",
       "      <td>59</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487349831</td>\n",
       "      <td>2017-02-18 00:43:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>554513</th>\n",
       "      <td>58</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487318692</td>\n",
       "      <td>2017-02-17 16:04:52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>557835</th>\n",
       "      <td>56</td>\n",
       "      <td>1_11</td>\n",
       "      <td>1487340910</td>\n",
       "      <td>2017-02-17 22:15:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558773</th>\n",
       "      <td>54</td>\n",
       "      <td>1_1</td>\n",
       "      <td>1487364833</td>\n",
       "      <td>2017-02-18 04:53:53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>558588</th>\n",
       "      <td>53</td>\n",
       "      <td>1_3</td>\n",
       "      <td>1487357435</td>\n",
       "      <td>2017-02-18 02:50:35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556519</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487333024</td>\n",
       "      <td>2017-02-17 20:03:44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>556439</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487332579</td>\n",
       "      <td>2017-02-17 19:56:19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559337</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487375009</td>\n",
       "      <td>2017-02-18 07:43:29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559443</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487375981</td>\n",
       "      <td>2017-02-18 07:59:41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559447</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487376022</td>\n",
       "      <td>2017-02-18 08:00:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559449</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487376026</td>\n",
       "      <td>2017-02-18 08:00:26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>563023</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487396856</td>\n",
       "      <td>2017-02-18 13:47:36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562794</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487395926</td>\n",
       "      <td>2017-02-18 13:32:06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562785</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487395883</td>\n",
       "      <td>2017-02-18 13:31:23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562769</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487395837</td>\n",
       "      <td>2017-02-18 13:30:37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562688</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487395551</td>\n",
       "      <td>2017-02-18 13:25:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562267</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487393581</td>\n",
       "      <td>2017-02-18 12:53:01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562247</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487393479</td>\n",
       "      <td>2017-02-18 12:51:19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562069</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487391091</td>\n",
       "      <td>2017-02-18 12:11:31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561975</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487390027</td>\n",
       "      <td>2017-02-18 11:53:47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561789</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487389212</td>\n",
       "      <td>2017-02-18 11:40:12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561777</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487389148</td>\n",
       "      <td>2017-02-18 11:39:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561753</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487389052</td>\n",
       "      <td>2017-02-18 11:37:32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561624</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487388353</td>\n",
       "      <td>2017-02-18 11:25:53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>561146</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487385989</td>\n",
       "      <td>2017-02-18 10:46:29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>560079</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487380661</td>\n",
       "      <td>2017-02-18 09:17:41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559797</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487378728</td>\n",
       "      <td>2017-02-18 08:45:28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559762</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487378442</td>\n",
       "      <td>2017-02-18 08:40:42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559736</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487378291</td>\n",
       "      <td>2017-02-18 08:38:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559713</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487390400</td>\n",
       "      <td>2017-02-18 12:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559701</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487378064</td>\n",
       "      <td>2017-02-18 08:34:24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559575</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487377195</td>\n",
       "      <td>2017-02-18 08:19:55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559559</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487377036</td>\n",
       "      <td>2017-02-18 08:17:16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559543</th>\n",
       "      <td>1</td>\n",
       "      <td>1_2</td>\n",
       "      <td>1487376897</td>\n",
       "      <td>2017-02-18 08:14:57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>560896</th>\n",
       "      <td>1</td>\n",
       "      <td>1_27</td>\n",
       "      <td>1487384669</td>\n",
       "      <td>2017-02-18 10:24:29</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9843 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        sum  cate   timestamp                date\n",
       "557579  212   1_6  1487424900 2017-02-18 21:35:00\n",
       "558082  168  1_11  1487342792 2017-02-17 22:46:32\n",
       "558788  156   1_6  1487364890 2017-02-18 04:54:50\n",
       "557167  134   1_6  1487337226 2017-02-17 21:13:46\n",
       "558910  131   1_1  1487370659 2017-02-18 06:30:59\n",
       "556664  122   1_8  1487365200 2017-02-18 05:00:00\n",
       "552472  111   1_2  1487347260 2017-02-18 00:01:00\n",
       "555820  106   1_6  1487404440 2017-02-18 15:54:00\n",
       "558077   94   1_5  1487342774 2017-02-17 22:46:14\n",
       "557018   92   1_6  1487336345 2017-02-17 20:59:05\n",
       "556700   91   1_1  1487334176 2017-02-17 20:22:56\n",
       "558096   91  1_12  1487342870 2017-02-17 22:47:50\n",
       "558498   89  1_11  1487355430 2017-02-18 02:17:10\n",
       "558259   88   1_6  1487349238 2017-02-18 00:33:58\n",
       "558768   85   1_1  1487364894 2017-02-18 04:54:54\n",
       "558625   78   1_6  1487358596 2017-02-18 03:09:56\n",
       "558871   76   1_1  1487369718 2017-02-18 06:15:18\n",
       "558578   73   1_3  1487357157 2017-02-18 02:45:57\n",
       "558331   73   1_3  1487351195 2017-02-18 01:06:35\n",
       "552335   73   1_6  1487376000 2017-02-18 08:00:00\n",
       "472094   69  1_11  1486440129 2017-02-07 12:02:09\n",
       "556595   67   1_3  1487363400 2017-02-18 04:30:00\n",
       "556489   62   1_3  1487347920 2017-02-18 00:12:00\n",
       "558592   62  1_27  1487357702 2017-02-18 02:55:02\n",
       "557193   61   1_6  1487337417 2017-02-17 21:16:57\n",
       "558274   59   1_1  1487349831 2017-02-18 00:43:51\n",
       "554513   58   1_1  1487318692 2017-02-17 16:04:52\n",
       "557835   56  1_11  1487340910 2017-02-17 22:15:10\n",
       "558773   54   1_1  1487364833 2017-02-18 04:53:53\n",
       "558588   53   1_3  1487357435 2017-02-18 02:50:35\n",
       "...     ...   ...         ...                 ...\n",
       "556519    1   1_2  1487333024 2017-02-17 20:03:44\n",
       "556439    1   1_2  1487332579 2017-02-17 19:56:19\n",
       "559337    1   1_2  1487375009 2017-02-18 07:43:29\n",
       "559443    1   1_2  1487375981 2017-02-18 07:59:41\n",
       "559447    1   1_2  1487376022 2017-02-18 08:00:22\n",
       "559449    1   1_2  1487376026 2017-02-18 08:00:26\n",
       "563023    1   1_2  1487396856 2017-02-18 13:47:36\n",
       "562794    1   1_2  1487395926 2017-02-18 13:32:06\n",
       "562785    1   1_2  1487395883 2017-02-18 13:31:23\n",
       "562769    1   1_2  1487395837 2017-02-18 13:30:37\n",
       "562688    1   1_2  1487395551 2017-02-18 13:25:51\n",
       "562267    1   1_2  1487393581 2017-02-18 12:53:01\n",
       "562247    1   1_2  1487393479 2017-02-18 12:51:19\n",
       "562069    1   1_2  1487391091 2017-02-18 12:11:31\n",
       "561975    1   1_2  1487390027 2017-02-18 11:53:47\n",
       "561789    1   1_2  1487389212 2017-02-18 11:40:12\n",
       "561777    1   1_2  1487389148 2017-02-18 11:39:08\n",
       "561753    1   1_2  1487389052 2017-02-18 11:37:32\n",
       "561624    1   1_2  1487388353 2017-02-18 11:25:53\n",
       "561146    1   1_2  1487385989 2017-02-18 10:46:29\n",
       "560079    1   1_2  1487380661 2017-02-18 09:17:41\n",
       "559797    1   1_2  1487378728 2017-02-18 08:45:28\n",
       "559762    1   1_2  1487378442 2017-02-18 08:40:42\n",
       "559736    1   1_2  1487378291 2017-02-18 08:38:11\n",
       "559713    1   1_2  1487390400 2017-02-18 12:00:00\n",
       "559701    1   1_2  1487378064 2017-02-18 08:34:24\n",
       "559575    1   1_2  1487377195 2017-02-18 08:19:55\n",
       "559559    1   1_2  1487377036 2017-02-18 08:17:16\n",
       "559543    1   1_2  1487376897 2017-02-18 08:14:57\n",
       "560896    1  1_27  1487384669 2017-02-18 10:24:29\n",
       "\n",
       "[9843 rows x 4 columns]"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# gt.join(whole_news, how='outer')\n",
    "gt.index = [int(x) for x in gt.index]\n",
    "new_gt = gt.join(whole_news)\n",
    "del new_gt['item_id']\n",
    "del new_gt['cate_id']\n",
    "new_gt"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
